Objective: Summary of the demographics and soil characteristics of the Rwanda long term soil health study.
Add in details and links on study methodology here.
library(knitr)
library(ggplot2)
library(stringr)
suppressMessages(library(dplyr))
library(sp)
suppressMessages(library(dismo))
suppressMessages(library(stargazer))
library(leaflet)
library(XML)
suppressMessages(library(maptools))
wd <- "/Users/mlowes/drive/soil health study/data/rw baseline"
dd <- paste(wd, "data", sep = "/")
od <- paste(wd, "output", sep="/")
md <- paste(wd, "maps", sep="/")
drive <- "~/drive/r_help/4_output/statistical_test_outputs"
#load data:
# This data is being drawn from the Soil lab repository. It has the baseline data with it
d <- read.csv(paste(dd, "Rwanda_shs_commcare_soil data_final.csv", sep="/"), stringsAsFactors=FALSE)
I’m using the merged data from Emmanuel for now but I will need to go back and download the demographic data fresh from Commcare and replicate the merge process using “Identifiers with SSN_final” provided by Emmanuel.
Included code to pair demographic and soil data?: Not yet
Now let’s start cleaning the demographic variables
# take out weird CommCare stuff
d[d=="---"] <- NA
# take out demographic and text_final_question from variable names
#to.change <- c("text_final_questions.", "intro_champ_echantillon.",
# "demographic_info.", "other_inputs_", "crop1_15b_inputs.", "crop2_15b_inputs.",
# "^15b.", "historical_intro.")
#names(d) <- tapply(to.change, function(x) { gsub(x, "", names(d))})
names(d) <- gsub("text_fil_questions", "", names(d))
names(d) <- gsub("intro_champ_echantillon", "", names(d))
names(d) <- gsub("demographic_info", "", names(d))
names(d) <- gsub("other_inputs_", "", names(d))
names(d) <- gsub("crop1_15b_inputs", "", names(d))
names(d) <- gsub("crop2_15b_inputs", "", names(d))
names(d) <- gsub("^15b", "", names(d))
names(d) <- gsub("historical_intro", "", names(d))
names(d)[names(d)=="field_dim"] <- "field_dim1"
names(d)[names(d)=="v51"] <- "field_dim2"
Take care of demographic data formatting issues
# deal with names and drop unnecessary variables
d <- d %>%
dplyr::select(-c(rownumber, infoformid, introductiond_accept, photo,
infocompleted_time,
enumerator_me, contains("phone"), farmer_me, farmersurme, farmerme,
d_respondent, additiolsamplepackedandsenttol, additiolsamplerequestedfromlab,
datedryingcompleteifnecessary, driedindistrictifnecessary, senttohqyo,
collectedindistrictyo, excessstoredathq_, receivedathq_,dateofinitialdryingifnecessary,
samplecollectedinfieldyo, field_des, samplewetordry)) %>%
rename(
female = sex,
age = age_cultivateur,
own = d_own,
client = d_client) %>%
mutate(
female= ifelse(female=="gore", 1,0),
field.size = field_dim1*field_dim2
)
d$total.seasons <- apply(d[, grep("d_season_list", names(d))], 1, function(x) {
sum(x, na.rm=T)})
Fix some more variable names:
names(d)[names(d)=="field_kg_fert1_1"] <- "field_kg_fert1_15b"
names(d)[names(d)=="field_kg_fert2_1"] <- "field_kg_fert2_15b"
names(d)[names(d)=="field_kg_compost"] <- "field_kg_compost_15b"
Recode variables to numeric:
# recode to numeric
varlist <- c("client", "own", "crop1_15b_seedkg", "crop1_15b_yield", "crop1_15b_yield_",
"crop2_15b_seedkg", "crop2_15b_yield", "crop2_15b_yield_", "field_kg_fert1_15b",
"field_kg_fert2_15b", "field_kg_compost_15b", "d_lime_15b", "kg_lime_15b")
# check that there aren't values hidden in the character variables
#apply(d[,varlist], 2, function(x){table(x, useNA='ifany')})
# recode characters to numerics
d[, varlist] <- sapply(d[,varlist], as.numeric)
# divide out GPS coordinates
# http://rfunction.com/archives/1499
# replace the blank gps_pic_guide with info
d <- cbind(d, str_split_fixed(d$gps_pic_guid, " ", n=4))
names(d)[106:109] <- c("lat", "lon", "alt", "precision")
d[,c("lat", "lon", "alt", "precision")] <- sapply(d[,c("lat", "lon", "alt", "precision")],
function(x){as.numeric(as.character(x))})
Check out missing data in the district variable:
#table(d$district, useNA = 'ifany')
length(d[d$district=="",])
## [1] 109
# these are the sample for which we have soil data but not survey data drop these for now
d <- d[-which(d$district==""),]
Cleaning of soil data: Come back, check and clean the soil data before outputting to clean data set. Plot each of the soil variables to look for unrealistic values.
dim(d[is.na(d$m3.Ca),])
## [1] 11 109
d <- d[-which(is.na(d$m3.Ca)),]
summary(d[,c("m3.Ca", "m3.Mg", "pH", "Total.N", "Total.C")])
## m3.Ca m3.Mg pH Total.N
## Min. : 220.5 Min. : 38.26 Min. :4.547 Min. :0.05523
## 1st Qu.: 443.0 1st Qu.: 114.16 1st Qu.:5.021 1st Qu.:0.13596
## Median : 720.2 Median : 184.98 Median :5.539 Median :0.15552
## Mean : 876.4 Mean : 207.82 Mean :5.544 Mean :0.15717
## 3rd Qu.:1157.9 3rd Qu.: 270.26 3rd Qu.:6.009 3rd Qu.:0.17886
## Max. :3669.2 Max. :1008.93 Max. :7.211 Max. :0.24701
## Total.C
## Min. :0.8988
## 1st Qu.:1.7227
## Median :2.1118
## Mean :2.1096
## 3rd Qu.:2.3657
## Max. :4.1620
Let’s check out the rows for which we don’t have soil data and drop them as they won’t contribute to the full picture.
soilVars <- c("m3.Ca", "m3.Mg", "pH", "Total.N", "Total.C")
for(i in 1:length(soilVars)){
print(
ggplot(data=d, aes(x=as.factor(client), y=d[,soilVars[i]])) +
geom_boxplot() +
labs(x="Tubura Farmer", y=soilVars[i], title = paste("RW baseline soil - ", soilVars[i], sep = ""))
)
}
There are biologically predictable relationships between soil chemical characteristics. For instance, we expect Ca and Mg to move in the same direction and be positively correlated with pH. If we had Aluminum as an outcome, we’d expect pH to be negatively correlated with soluable aluminum. Let’s look quickly to confirm if those relationships are present:
ggplot(d, aes(x=m3.Ca, y=m3.Mg)) + geom_point() +
labs(x = "Calcium", y= "Magnesium", title="Calcium/Magnesium relationship")
ggplot(d, aes(x=pH, y=m3.Ca)) + geom_point() +
labs(x = "pH", y="Calcium", title = "pH and Calcium relationship")
ggplot(d, aes(x=pH, y=m3.Mg)) + geom_point() +
labs(x = "pH", y="Calcium", title = "pH and Magnesium relationship")
ggplot(d, aes(x=Total.C, y=Total.N)) + geom_point() +
labs(x = "Total Carbon", y="Total Nitrogen", title = "Carbon and Nitrogen relationship")
The soil characteristics are moving in the manner that is consistent with our understanding of soil chemical processes.
Save clean demographic and soil data to external file
write.csv(d, file=paste(paste(wd, "data", sep = "/"), "shs rw baseline.Rdata", sep = "/"))
save(d, file=paste(paste(wd, "data", sep = "/"), "shs rw baseline.Rdata", sep = "/"))
Produce a simple map of where our observations are
e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e)
map <- leaflet() %>% addTiles() %>%
setView(lng=rwanda$longitude, lat=rwanda$latitude, zoom=8) %>%
addCircleMarkers(lng=ss$lon, lat=ss$lat, clusterOptions = markerClusterOptions())
map
count <- d %>% group_by(district) %>%
dplyr::summarize(
t.count = sum(ifelse(client==1,1,0)),
c.count = sum(ifelse(client==0,1,0)),
total = n()
) %>% ungroup()
count <- as.data.frame(count)
write.csv(count, file=paste(od, "final rw sample breakdown.csv", sep="/"), row.names=F)
as.data.frame(count)
## district t.count c.count total
## 1 Gatsibo_LWH 58 59 117
## 2 Gatsibo_NLWH 87 81 168
## 3 Gisagara 46 46 92
## 4 Huye 114 115 229
## 5 Karongi 163 159 322
## 6 Kayonza 55 58 113
## 7 Mugonero 131 129 260
## 8 Nyamagabe 56 56 112
## 9 Nyamasheke 147 146 293
## 10 Nyanza 46 46 92
## 11 Nyaruguru 46 46 92
## 12 Rutsiro 166 166 332
## 13 Rwamaga 110 111 221
Let’s see how balanced our farmers are in terms of demographic variables. Tubura farmers were selected based on (list criteria) and control farmers in the same area tha fit the same criteria were also selected. No matching process has been performed to identify the control farmers that most closely resemble the Tubura farmers in the sample.
out.list <- c("female", "age", "hhsize", "own", "field.size",
"n_season_fert", "n_season_compost", "n_season_lime", "n_season_fallow", "n_seasons_leg_1", "n_seasons_leg_2", "m3.Ca", "m3.Mg",
"pH", "Total.C", "Total.N")
output <- do.call(rbind, lapply(out.list, function(x) {
out <- t.test(d[,x] ~ d[,"client"], data=d)
tab <- data.frame(out[[5]][[1]], out[[5]][[2]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
return(tab)
}))
# use p.adjust with bonferroni correction
output$pvalue <- p.adjust(output$pvalue, method="fdr")
rownames(output) <- out.list
output <- output[order(output$pvalue),]
output$pvalue <- ifelse(output[, 3] < 0.001, "< 0.001", round(output[, 3], 3))
colnames(output) <- c("Non-Tubura", "Tubura Client", "p-value")
print(kable(output))
| Non-Tubura | Tubura Client | p-value | |
|---|---|---|---|
| n_season_fert | 0.833 | 3.260 | < 0.001 |
| female | 0.649 | 0.493 | < 0.001 |
| age | 48.111 | 44.002 | < 0.001 |
| hhsize | 4.896 | 5.416 | < 0.001 |
| n_season_lime | 0.132 | 0.228 | < 0.001 |
| own | 0.956 | 0.932 | 0.024 |
| n_season_compost | 5.523 | 5.818 | 0.126 |
| n_season_fallow | 0.570 | 0.690 | 0.136 |
| Total.N | 0.158 | 0.156 | 0.138 |
| m3.Ca | 894.242 | 858.574 | 0.157 |
| m3.Mg | 211.917 | 203.756 | 0.157 |
| field.size | 602.096 | 667.258 | 0.214 |
| pH | 5.556 | 5.532 | 0.348 |
| Total.C | 2.120 | 2.099 | 0.348 |
| n_seasons_leg_1 | 2.130 | 2.233 | 0.374 |
| n_seasons_leg_2 | 3.352 | 5.398 | 0.453 |
#write table
write.csv(output, file=paste(od, "baseline balance.csv", sep="/"), row.names=T)
Demographic variables We are not well balanced along the main demographic variables we collected, sex, age and HH size. For the purposes of inference we can test some matching algorithms to improve the match between Tubura and control farmers.
Agricultural practice variables We are decently balanced along agricultural practice variables. Our course of action here is similiar to our options with the demographic variables.
Soil Variables We are balanced on the primary soil variables of interest betwen our Tubura farmers and the comparison farmers.
dist.output <- do.call(rbind, lapply(split(d, d$district), function(x) {
tab <- do.call(rbind, lapply(out.list, function(y) {
out <- t.test(x[,y] ~ x[,"client"], data=x)
tab <- data.frame(out[[5]][[1]], out[[5]][[2]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
#tab[,3] <- p.adjust(tab[,3], method="holm")
#tab[,3] <- ifelse(tab[,3] < 0.001, "< 0.001", round(tab[,3],3))
#print(tab)
return(tab)
}))
return(data.frame(district = unique(x$district), tab))
}))
rownames(dist.output) <- NULL
dist.output$variable <- rep(out.list,13)
# order variables
dist.output <- dist.output[, c(1, 5, 2:4)]
dist.output$pvalue <- p.adjust(dist.output$pvalue, method="fdr")
dist.output <- dist.output[order(dist.output$pvalue),]
dist.output$pvalue <- ifelse(dist.output$pvalue < 0.001, "< 0.001", round(dist.output$pvalue,3))
colnames(dist.output) <- c("District", "Varible", "Non-Tubura", "Tubura Client", "p-value")
print(kable(dist.output))
| District | Varible | Non-Tubura | Tubura Client | p-value | |
|---|---|---|---|---|---|
| 70 | Karongi | n_season_fert | 0.447 | 3.785 | < 0.001 |
| 102 | Mugonero | n_season_fert | 0.597 | 4.313 | < 0.001 |
| 182 | Rutsiro | n_season_fert | 1.072 | 4.175 | < 0.001 |
| 134 | Nyamasheke | n_season_fert | 1.507 | 5.068 | < 0.001 |
| 198 | Rwamaga | n_season_fert | 1.054 | 2.645 | < 0.001 |
| 54 | Huye | n_season_fert | 0.322 | 2.246 | < 0.001 |
| 22 | Gatsibo_NLWH | n_season_fert | 0.296 | 1.517 | < 0.001 |
| 118 | Nyamagabe | n_season_fert | 1.554 | 4.000 | < 0.001 |
| 136 | Nyamasheke | n_season_lime | 0.041 | 0.327 | < 0.001 |
| 86 | Kayonza | n_season_fert | 0.776 | 2.073 | < 0.001 |
| 194 | Rwamaga | age | 52.072 | 44.245 | 0.001 |
| 131 | Nyamasheke | hhsize | 4.685 | 5.701 | 0.003 |
| 65 | Karongi | female | 0.667 | 0.466 | 0.004 |
| 163 | Nyaruguru | hhsize | 4.717 | 6.217 | 0.004 |
| 45 | Gisagara | m3.Mg | 241.171 | 190.036 | 0.006 |
| 50 | Huye | age | 49.400 | 43.395 | 0.009 |
| 44 | Gisagara | m3.Ca | 1203.823 | 895.928 | 0.01 |
| 145 | Nyanza | female | 0.543 | 0.217 | 0.012 |
| 150 | Nyanza | n_season_fert | 0.152 | 1.326 | 0.019 |
| 18 | Gatsibo_NLWH | age | 46.210 | 39.897 | 0.019 |
| 17 | Gatsibo_NLWH | female | 0.556 | 0.322 | 0.021 |
| 2 | Gatsibo_LWH | age | 49.949 | 41.931 | 0.027 |
| 113 | Nyamagabe | female | 0.750 | 0.482 | 0.03 |
| 81 | Kayonza | female | 0.672 | 0.400 | 0.03 |
| 20 | Gatsibo_NLWH | own | 1.000 | 0.908 | 0.031 |
| 104 | Mugonero | n_season_lime | 0.023 | 0.176 | 0.031 |
| 193 | Rwamaga | female | 0.739 | 0.555 | 0.031 |
| 1 | Gatsibo_LWH | female | 0.492 | 0.241 | 0.035 |
| 179 | Rutsiro | hhsize | 4.855 | 5.608 | 0.046 |
| 99 | Mugonero | hhsize | 5.054 | 5.802 | 0.049 |
| 98 | Mugonero | age | 49.140 | 44.290 | 0.067 |
| 161 | Nyaruguru | female | 0.652 | 0.391 | 0.077 |
| 48 | Gisagara | Total.N | 0.154 | 0.142 | 0.079 |
| 47 | Gisagara | Total.C | 2.005 | 1.816 | 0.086 |
| 184 | Rutsiro | n_season_lime | 0.096 | 0.325 | 0.086 |
| 135 | Nyamasheke | n_season_compost | 5.521 | 6.544 | 0.089 |
| 59 | Huye | n_seasons_leg_2 | 5.278 | 3.667 | 0.096 |
| 138 | Nyamasheke | n_seasons_leg_1 | 1.658 | 2.327 | 0.101 |
| 166 | Nyaruguru | n_season_fert | 1.065 | 2.130 | 0.104 |
| 35 | Gisagara | hhsize | 4.326 | 5.348 | 0.113 |
| 6 | Gatsibo_LWH | n_season_fert | 1.475 | 2.552 | 0.121 |
| 51 | Huye | hhsize | 4.574 | 5.132 | 0.141 |
| 115 | Nyamagabe | hhsize | 4.696 | 5.482 | 0.141 |
| 41 | Gisagara | n_season_fallow | 0.087 | 0.500 | 0.184 |
| 43 | Gisagara | n_seasons_leg_2 | 3.283 | 2.370 | 0.184 |
| 148 | Nyanza | own | 1.000 | 0.913 | 0.197 |
| 196 | Rwamaga | own | 0.973 | 0.909 | 0.197 |
| 46 | Gisagara | pH | 5.945 | 5.759 | 0.213 |
| 27 | Gatsibo_NLWH | n_seasons_leg_2 | 3.889 | 3.046 | 0.22 |
| 38 | Gisagara | n_season_fert | 0.348 | 1.087 | 0.22 |
| 72 | Karongi | n_season_lime | 0.013 | 0.098 | 0.225 |
| 155 | Nyanza | n_seasons_leg_2 | 3.478 | 2.130 | 0.225 |
| 88 | Kayonza | n_season_lime | 0.707 | 0.527 | 0.226 |
| 201 | Rwamaga | n_season_fallow | 0.712 | 1.218 | 0.226 |
| 114 | Nyamagabe | age | 48.875 | 43.768 | 0.229 |
| 176 | Nyaruguru | Total.N | 0.170 | 0.161 | 0.238 |
| 205 | Rwamaga | m3.Mg | 293.168 | 273.526 | 0.298 |
| 146 | Nyanza | age | 50.152 | 44.717 | 0.335 |
| 33 | Gisagara | female | 0.609 | 0.435 | 0.339 |
| 66 | Karongi | age | 47.384 | 44.681 | 0.339 |
| 95 | Kayonza | Total.C | 2.556 | 2.400 | 0.343 |
| 82 | Kayonza | age | 46.500 | 42.345 | 0.351 |
| 64 | Huye | Total.N | 0.148 | 0.143 | 0.353 |
| 23 | Gatsibo_NLWH | n_season_compost | 3.420 | 2.632 | 0.366 |
| 92 | Kayonza | m3.Ca | 1863.601 | 1664.716 | 0.366 |
| 177 | Rutsiro | female | 0.627 | 0.542 | 0.375 |
| 183 | Rutsiro | n_season_compost | 7.602 | 8.163 | 0.396 |
| 190 | Rutsiro | pH | 5.342 | 5.261 | 0.396 |
| 11 | Gatsibo_LWH | n_seasons_leg_2 | 2.898 | 1.983 | 0.399 |
| 49 | Huye | female | 0.591 | 0.500 | 0.446 |
| 84 | Kayonza | own | 0.897 | 0.964 | 0.446 |
| 85 | Kayonza | field.size | 423.776 | 664.236 | 0.446 |
| 96 | Kayonza | Total.N | 0.187 | 0.180 | 0.446 |
| 108 | Mugonero | m3.Ca | 604.767 | 555.744 | 0.446 |
| 122 | Nyamagabe | n_seasons_leg_1 | 1.125 | 1.625 | 0.446 |
| 168 | Nyaruguru | n_season_lime | 0.000 | 0.043 | 0.446 |
| 185 | Rutsiro | n_season_fallow | 0.289 | 0.470 | 0.446 |
| 207 | Rwamaga | Total.C | 2.232 | 2.169 | 0.446 |
| 24 | Gatsibo_NLWH | n_season_lime | 0.025 | 0.069 | 0.455 |
| 158 | Nyanza | pH | 5.999 | 6.105 | 0.46 |
| 164 | Nyaruguru | own | 0.935 | 0.848 | 0.46 |
| 188 | Rutsiro | m3.Ca | 685.223 | 629.522 | 0.46 |
| 203 | Rwamaga | n_seasons_leg_2 | 1.685 | 0.536 | 0.46 |
| 174 | Nyaruguru | pH | 5.210 | 5.319 | 0.466 |
| 71 | Karongi | n_season_compost | 6.553 | 7.074 | 0.472 |
| 130 | Nyamasheke | age | 48.123 | 45.898 | 0.499 |
| 156 | Nyanza | m3.Ca | 987.213 | 1083.103 | 0.499 |
| 68 | Karongi | own | 0.981 | 0.957 | 0.499 |
| 58 | Huye | n_seasons_leg_1 | 1.043 | 1.386 | 0.522 |
| 173 | Nyaruguru | m3.Mg | 138.204 | 151.996 | 0.522 |
| 110 | Mugonero | pH | 5.242 | 5.181 | 0.531 |
| 157 | Nyanza | m3.Mg | 210.773 | 230.037 | 0.531 |
| 67 | Karongi | hhsize | 4.912 | 5.172 | 0.536 |
| 175 | Nyaruguru | Total.C | 2.229 | 2.140 | 0.548 |
| 142 | Nyamasheke | pH | 5.123 | 5.179 | 0.568 |
| 3 | Gatsibo_LWH | hhsize | 5.763 | 5.362 | 0.616 |
| 42 | Gisagara | n_seasons_leg_1 | 2.870 | 2.304 | 0.616 |
| 56 | Huye | n_season_lime | 0.113 | 0.035 | 0.616 |
| 204 | Rwamaga | m3.Ca | 1376.064 | 1298.489 | 0.616 |
| 189 | Rutsiro | m3.Mg | 174.684 | 163.413 | 0.616 |
| 165 | Nyaruguru | field.size | 495.239 | 570.022 | 0.621 |
| 4 | Gatsibo_LWH | own | 1.000 | 0.983 | 0.622 |
| 52 | Huye | own | 0.991 | 0.974 | 0.622 |
| 93 | Kayonza | m3.Mg | 357.168 | 336.861 | 0.622 |
| 100 | Mugonero | own | 0.915 | 0.947 | 0.622 |
| 128 | Nyamagabe | Total.N | 0.165 | 0.169 | 0.622 |
| 191 | Rutsiro | Total.C | 2.298 | 2.245 | 0.622 |
| 97 | Mugonero | female | 0.713 | 0.656 | 0.627 |
| 181 | Rutsiro | field.size | 432.241 | 581.313 | 0.63 |
| 79 | Karongi | Total.C | 1.832 | 1.877 | 0.636 |
| 8 | Gatsibo_LWH | n_season_lime | 0.322 | 0.500 | 0.645 |
| 37 | Gisagara | field.size | 559.261 | 493.957 | 0.645 |
| 94 | Kayonza | pH | 6.203 | 6.129 | 0.645 |
| 139 | Nyamasheke | n_seasons_leg_2 | 3.575 | 24.796 | 0.645 |
| 5 | Gatsibo_LWH | field.size | 916.441 | 776.345 | 0.659 |
| 107 | Mugonero | n_seasons_leg_2 | 4.031 | 3.511 | 0.659 |
| 127 | Nyamagabe | Total.C | 2.266 | 2.336 | 0.659 |
| 187 | Rutsiro | n_seasons_leg_2 | 2.584 | 2.048 | 0.659 |
| 206 | Rwamaga | pH | 5.873 | 5.817 | 0.659 |
| 101 | Mugonero | field.size | 393.124 | 441.359 | 0.66 |
| 171 | Nyaruguru | n_seasons_leg_2 | 3.370 | 5.326 | 0.661 |
| 208 | Rwamaga | Total.N | 0.178 | 0.175 | 0.661 |
| 25 | Gatsibo_NLWH | n_season_fallow | 0.543 | 0.736 | 0.667 |
| 129 | Nyamasheke | female | 0.692 | 0.646 | 0.683 |
| 57 | Huye | n_season_fallow | 0.487 | 0.675 | 0.684 |
| 133 | Nyamasheke | field.size | 580.130 | 787.527 | 0.684 |
| 147 | Nyanza | hhsize | 4.891 | 5.261 | 0.684 |
| 192 | Rutsiro | Total.N | 0.159 | 0.157 | 0.684 |
| 172 | Nyaruguru | m3.Ca | 615.230 | 665.044 | 0.687 |
| 144 | Nyamasheke | Total.N | 0.167 | 0.165 | 0.69 |
| 117 | Nyamagabe | field.size | 282.643 | 310.929 | 0.7 |
| 34 | Gisagara | age | 46.848 | 44.761 | 0.703 |
| 10 | Gatsibo_LWH | n_seasons_leg_1 | 4.678 | 4.345 | 0.715 |
| 69 | Karongi | field.size | 446.088 | 490.788 | 0.715 |
| 109 | Mugonero | m3.Mg | 169.468 | 159.707 | 0.715 |
| 178 | Rutsiro | age | 44.765 | 43.542 | 0.715 |
| 77 | Karongi | m3.Mg | 269.452 | 254.229 | 0.715 |
| 21 | Gatsibo_NLWH | field.size | 1459.901 | 1306.069 | 0.739 |
| 55 | Huye | n_season_compost | 4.878 | 5.211 | 0.739 |
| 12 | Gatsibo_LWH | m3.Ca | 1139.996 | 1206.971 | 0.754 |
| 39 | Gisagara | n_season_compost | 3.957 | 4.326 | 0.754 |
| 78 | Karongi | pH | 5.477 | 5.441 | 0.754 |
| 80 | Karongi | Total.N | 0.142 | 0.144 | 0.754 |
| 90 | Kayonza | n_seasons_leg_1 | 2.155 | 1.982 | 0.754 |
| 153 | Nyanza | n_season_fallow | 0.870 | 1.130 | 0.754 |
| 180 | Rutsiro | own | 0.934 | 0.916 | 0.754 |
| 199 | Rwamaga | n_season_compost | 3.748 | 4.018 | 0.754 |
| 106 | Mugonero | n_seasons_leg_1 | 1.186 | 1.336 | 0.764 |
| 19 | Gatsibo_NLWH | hhsize | 5.160 | 5.333 | 0.766 |
| 91 | Kayonza | n_seasons_leg_2 | 1.259 | 1.091 | 0.766 |
| 116 | Nyamagabe | own | 0.982 | 0.964 | 0.766 |
| 169 | Nyaruguru | n_season_fallow | 0.261 | 0.370 | 0.766 |
| 36 | Gisagara | own | 0.870 | 0.826 | 0.767 |
| 9 | Gatsibo_LWH | n_season_fallow | 0.288 | 0.397 | 0.769 |
| 119 | Nyamagabe | n_season_compost | 7.518 | 7.857 | 0.769 |
| 120 | Nyamagabe | n_season_lime | 0.393 | 0.500 | 0.78 |
| 61 | Huye | m3.Mg | 190.397 | 186.200 | 0.789 |
| 141 | Nyamasheke | m3.Mg | 147.629 | 153.726 | 0.789 |
| 53 | Huye | field.size | 567.922 | 599.096 | 0.8 |
| 123 | Nyamagabe | n_seasons_leg_2 | 2.929 | 3.161 | 0.8 |
| 29 | Gatsibo_NLWH | m3.Mg | 254.332 | 246.971 | 0.814 |
| 132 | Nyamasheke | own | 0.945 | 0.932 | 0.814 |
| 140 | Nyamasheke | m3.Ca | 590.765 | 613.771 | 0.814 |
| 167 | Nyaruguru | n_season_compost | 6.043 | 5.696 | 0.814 |
| 149 | Nyanza | field.size | 766.217 | 853.359 | 0.818 |
| 170 | Nyaruguru | n_seasons_leg_1 | 2.348 | 2.043 | 0.819 |
| 154 | Nyanza | n_seasons_leg_1 | 2.283 | 2.543 | 0.828 |
| 197 | Rwamaga | field.size | 842.054 | 892.855 | 0.831 |
| 202 | Rwamaga | n_seasons_leg_1 | 1.568 | 1.682 | 0.831 |
| 126 | Nyamagabe | pH | 5.022 | 5.002 | 0.836 |
| 200 | Rwamaga | n_season_lime | 0.324 | 0.355 | 0.836 |
| 60 | Huye | m3.Ca | 870.054 | 853.743 | 0.847 |
| 62 | Huye | pH | 5.665 | 5.684 | 0.848 |
| 30 | Gatsibo_NLWH | pH | 6.061 | 6.038 | 0.852 |
| 103 | Mugonero | n_season_compost | 6.070 | 6.229 | 0.872 |
| 137 | Nyamasheke | n_season_fallow | 0.664 | 0.599 | 0.872 |
| 76 | Karongi | m3.Ca | 803.893 | 787.032 | 0.89 |
| 16 | Gatsibo_LWH | Total.N | 0.146 | 0.148 | 0.916 |
| 13 | Gatsibo_LWH | m3.Mg | 235.470 | 239.641 | 0.929 |
| 14 | Gatsibo_LWH | pH | 6.119 | 6.138 | 0.929 |
| 15 | Gatsibo_LWH | Total.C | 1.866 | 1.887 | 0.929 |
| 63 | Huye | Total.C | 1.919 | 1.910 | 0.929 |
| 105 | Mugonero | n_season_fallow | 0.915 | 0.863 | 0.929 |
| 121 | Nyamagabe | n_season_fallow | 0.304 | 0.268 | 0.929 |
| 124 | Nyamagabe | m3.Ca | 449.585 | 456.505 | 0.929 |
| 125 | Nyamagabe | m3.Mg | 108.307 | 106.662 | 0.929 |
| 143 | Nyamasheke | Total.C | 2.285 | 2.298 | 0.929 |
| 159 | Nyanza | Total.C | 1.778 | 1.761 | 0.929 |
| 28 | Gatsibo_NLWH | m3.Ca | 1246.188 | 1230.161 | 0.931 |
| 75 | Karongi | n_seasons_leg_2 | 3.956 | 3.816 | 0.931 |
| 195 | Rwamaga | hhsize | 4.982 | 4.927 | 0.931 |
| 83 | Kayonza | hhsize | 5.155 | 5.091 | 0.932 |
| 7 | Gatsibo_LWH | n_season_compost | 5.780 | 5.672 | 0.943 |
| 112 | Mugonero | Total.N | 0.153 | 0.154 | 0.943 |
| 89 | Kayonza | n_season_fallow | 0.483 | 0.455 | 0.952 |
| 160 | Nyanza | Total.N | 0.134 | 0.134 | 0.962 |
| 31 | Gatsibo_NLWH | Total.C | 1.994 | 2.002 | 0.964 |
| 186 | Rutsiro | n_seasons_leg_1 | 3.452 | 3.416 | 0.964 |
| 151 | Nyanza | n_season_compost | 3.326 | 3.261 | 0.966 |
| 32 | Gatsibo_NLWH | Total.N | 0.158 | 0.157 | 0.977 |
| 26 | Gatsibo_NLWH | n_seasons_leg_1 | 1.556 | 1.540 | 0.978 |
| 73 | Karongi | n_season_fallow | 0.843 | 0.834 | 0.978 |
| 74 | Karongi | n_seasons_leg_1 | 2.497 | 2.485 | 0.978 |
| 87 | Kayonza | n_season_compost | 3.534 | 3.564 | 0.978 |
| 111 | Mugonero | Total.C | 2.203 | 2.206 | 0.978 |
| 162 | Nyaruguru | age | 48.304 | 48.457 | 0.978 |
| 40 | Gisagara | n_season_lime | 0.022 | 0.022 | 1 |
| 152 | Nyanza | n_season_lime | 0.000 | 0.000 | NA |
Demographic variables interpretation here.
Agricultural practice variables interpretation here
Soil Variables interpretation here
write.csv(dist.output, file=paste(od, "district balance.csv", sep="/"), row.names=T)
Look at farmers by duration of tenure farming with Tubura. We want to understand, at least with an initial naive baseline sense, what is the cumulative effect of Tubura practices on soil health outcomes?
We will look only at current Tubura farmers and compare first year farmers to farmers with more experience with Tubura.
oafOnly <- d[which(d$client==0 & d$total.seasons>=1),]
for(i in 1:length(soilVars)){
print(
ggplot(oafOnly, aes(x=as.factor(total.seasons), y=oafOnly[,soilVars[i]])) +
geom_boxplot() +
labs(x="Tubura Tenure", y=soilVars[i], title = paste("RW baseline soil by tenure - ", soilVars[i], sep = ""))
)
}
tenureSum <- aggregate(oafOnly[,out.list], by=list(oafOnly$total.seasons), function(x){
round(mean(x, na.rm=T),2)
})
tenureSum <- as.data.frame(t(tenureSum))
colnames(tenureSum) <- c(paste(seq(1,11,1), " seas.", sep = ""), "13 seas.")
print(kable(tenureSum))
| 1 seas. | 2 seas. | 3 seas. | 4 seas. | 5 seas. | 6 seas. | 7 seas. | 8 seas. | 9 seas. | 10 seas. | 11 seas. | 13 seas. | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Group.1 | 1.00 | 2.00 | 3.00 | 4.00 | 5.00 | 6.00 | 7.00 | 8.00 | 9.00 | 10.00 | 11.00 | 13.00 |
| female | 0.70 | 0.61 | 0.67 | 0.65 | 0.64 | 0.58 | 0.75 | 0.33 | 0.33 | 0.75 | 0.67 | 0.00 |
| age | 48.77 | 46.43 | 46.46 | 48.96 | 42.73 | 43.83 | 42.12 | 52.33 | 36.00 | 48.50 | 42.00 | 52.00 |
| hhsize | 5.30 | 5.40 | 5.71 | 4.91 | 6.09 | 5.25 | 5.50 | 5.83 | 6.67 | 6.50 | 8.67 | 6.00 |
| own | 0.92 | 0.97 | 1.00 | 1.00 | 0.91 | 1.00 | 1.00 | 0.83 | 1.00 | 1.00 | 1.00 | 1.00 |
| field.size | 564.72 | 595.76 | 390.17 | 547.13 | 976.18 | 289.17 | 346.75 | 602.33 | 256.00 | 241.00 | 334.67 | 480.00 |
| n_season_fert | 1.03 | 1.27 | 3.17 | 1.65 | 1.82 | 2.83 | 2.38 | 5.33 | 2.67 | 3.75 | 0.33 | 10.00 |
| n_season_compost | 5.48 | 5.61 | 6.58 | 4.43 | 5.82 | 6.50 | 6.75 | 8.33 | 10.00 | 8.25 | 4.67 | 10.00 |
| n_season_lime | 0.27 | 0.17 | 0.25 | 0.09 | 0.18 | 0.33 | 0.00 | 0.17 | 0.00 | 0.00 | 0.00 | 0.00 |
| n_season_fallow | 0.53 | 0.74 | 0.58 | 1.00 | 0.45 | 0.50 | 0.88 | 0.00 | 0.00 | 0.00 | 1.33 | 0.00 |
| n_seasons_leg_1 | 2.12 | 2.14 | 1.96 | 2.00 | 1.55 | 1.25 | 0.62 | 3.00 | 4.00 | 2.25 | 4.33 | 1.00 |
| n_seasons_leg_2 | 3.13 | 2.85 | 4.50 | 2.57 | 6.27 | 2.33 | 5.88 | 3.50 | 3.33 | 5.25 | 0.00 | 9.00 |
| m3.Ca | 1127.45 | 946.96 | 1034.46 | 686.67 | 678.56 | 625.67 | 870.40 | 443.64 | 1153.25 | 483.06 | 746.88 | 632.20 |
| m3.Mg | 247.35 | 226.65 | 252.39 | 165.35 | 153.59 | 138.80 | 217.69 | 120.77 | 307.42 | 153.92 | 177.55 | 224.80 |
| pH | 5.72 | 5.56 | 5.70 | 5.20 | 5.34 | 5.34 | 5.64 | 5.10 | 5.87 | 5.18 | 5.50 | 5.54 |
| Total.C | 2.13 | 2.19 | 2.19 | 2.36 | 2.01 | 2.06 | 1.83 | 2.36 | 1.85 | 2.26 | 1.77 | 2.06 |
| Total.N | 0.16 | 0.16 | 0.16 | 0.17 | 0.15 | 0.15 | 0.14 | 0.16 | 0.15 | 0.15 | 0.13 | 0.16 |
oafOnly$tenured <- ifelse(oafOnly$total.seasons==1,0,1)
tenure <- do.call(rbind, lapply(out.list, function(x) {
out <- t.test(oafOnly[,x] ~ oafOnly[,"tenured"], data=oafOnly)
tab <- data.frame(out[[5]][[1]], out[[5]][[2]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
return(tab)
}))
# use p.adjust with bonferroni correction
tenure$pvalue <- p.adjust(tenure$pvalue, method="fdr")
rownames(tenure) <- out.list
tenure <- tenure[order(tenure$pvalue),]
tenure$pvalue <- ifelse(tenure[, 3] < 0.001, "< 0.001", round(tenure[, 3], 3))
colnames(tenure) <- c("Non-Tubura", "Tubura Client", "p-value")
print(kable(tenure))
| Non-Tubura | Tubura Client | p-value | |
|---|---|---|---|
| n_season_fert | 1.032 | 1.944 | < 0.001 |
| m3.Ca | 1127.451 | 863.314 | 0.003 |
| pH | 5.724 | 5.492 | 0.003 |
| m3.Mg | 247.352 | 208.608 | 0.033 |
| n_season_lime | 0.272 | 0.162 | 0.105 |
| own | 0.920 | 0.975 | 0.115 |
| female | 0.704 | 0.614 | 0.219 |
| age | 48.768 | 46.213 | 0.235 |
| n_season_compost | 5.480 | 5.914 | 0.56 |
| hhsize | 5.304 | 5.523 | 0.574 |
| n_season_fallow | 0.528 | 0.680 | 0.574 |
| Total.C | 2.127 | 2.170 | 0.602 |
| n_seasons_leg_2 | 3.128 | 3.365 | 0.746 |
| field.size | 564.724 | 540.751 | 0.833 |
| n_seasons_leg_1 | 2.120 | 2.036 | 0.833 |
| Total.N | 0.160 | 0.160 | 0.835 |
Tubura tenure is being defined here as having more than 1 year experience farming with Tubura.
Demographic variables We are well balanced along demographic variables.
Agricultural practice variables Not surprisingly, Tubura farmers have more cumulative years of fertilizer use than current non-Tubura farmers. While that difference is signficant, it is realistically only a single season of fertilizer use different.
Interestingly, non-Tubura farmers reported using more lime than current Tubura farmers. This
Soil Variables Soil pH, calcium and magnesium levels are lower for tenured Tubura farmers. This is consistent with the hypothesis that increaesd fertilizer use leads to an increaese in soil acidity.
Here’s where we’ll look at the contribution of fertilizer, lime and cultivation practices on soil health outcomes. This analysis will be come richer as we gain longitudinal measures. I caution that we cannot treat these relationships as causal. The direction of causality is not clearly delineated in the data or the study design. However, we can identify meaningful connections between practices and outcomes through this analysis to generate new hypotheses for field testing.
#crdref <- CRS('+proj=longlat +datum=WGS84')
crdref <- CRS('+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0')
e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e, proj4string = crdref)
rw <- getData("GADM", country='RW', level=3, path = "/Users/mlowes/drive/soil health study/data") # the higher the number, the higher the resolution
#ext.rw.ss matches points with spatial polygons in rw
ext.rw.ss <- extract(rw[, "NAME_3"], ss)
## Loading required namespace: rgeos
ss$spatialname <- ext.rw.ss$NAME_3
frw <- fortify(rw, region="NAME_3")
ss.soil <- aggregate(ss@data[,soilVars], by=list(ss@data$spatialname), function(x){
mean(x, na.rm=T)
})
plotReady <- dplyr::left_join(frw, ss.soil, by=c("id"="Group.1"))
Consider revising these maps to a smaller greographic unit. Add the name of the location for uninitiated users.
library(RColorBrewer)
mapList <- list()
for(i in 1:length(soilVars)){
mapRes <- ggplot(plotReady, aes(x=long, y=lat, group=group)) + geom_path() +
geom_polygon(aes(fill=plotReady[,soilVars[i]])) +
scale_fill_gradientn(colours = rev(brewer.pal(9,"Reds")), # define colors
name = soilVars[i],
guide = guide_colorbar(legend.direction = "vertical")) +
theme_bw() +
labs(title=paste("Rwanda long term soil health baseline - 2016", soilVars[i], sep= " "), x = "Longitude", y="Latitude")
mapList[[i]] <- mapRes
print(mapRes)
}
pdf(file=paste(md, "rw_shs_baseline_soil_maps.pdf", sep = "/"), width=11, height=8.5)
for(i in 1:length(mapList)){
print(mapList[[i]])
}
dev.off()
## quartz_off_screen
## 2
Interpolate soil health values for full operating area using soil health study values. We want to eventually add all Rwandan soil values into a single dataset to update and hone these values.
suppressMessages(library(fields))
library(gstat)
r <- raster(res=1/12)
r <- crop(r, floor(extent(rw)))
soilInterpolation <- lapply(soilVars, function(x) {
m <- Tps(coordinates(ss), ss@data[,x])
# make raster layer with model, raster is rwanda empty raster, model is m
tps <- interpolate(r, m)
tps <- crop(tps, rw)
tps <- mask(tps, rw) # cuts the tps raster down to the rw boundaries
x <- gsub("m3.", "", x)
outPlot <- invisible(print(
plot(tps, main= paste("Soil TPS Interpolation ", x, sep="- ")),
plot(rw, add=T, na.print=NULL)
))
return(outPlot)
})
## NULL
## NULL
## NULL
## NULL
## NULL
pdf(file=paste(md, "rw_shs_bl_interpolation_soil_vars.pdf", sep = '/'), width=11, height=8.5)
for(i in 1:length(soilInterpolation)){
invisible(print(soilInterpolation[[i]]))
}
## NULL
## NULL
## NULL
## NULL
## NULL
dev.off()
## quartz_off_screen
## 2
we can look at this when we have paired yield and soil data
–end